library(tidyverse)
library(lubridate)
# library(rjson)
library(themebg)
data_dir <- "csse_covid_19_data/csse_covid_19_daily_reports"

f <- list.files(data_dir, pattern = "csv", full.names = TRUE)

curr <- read_csv(f[length(f)]) %>%
    arrange(Country_Region, Province_State, Admin2)

prev <- read_csv("csse_covid_19_data/csse_covid_19_daily_reports/03-30-2020.csv") %>%
    arrange(Country_Region, Province_State, Admin2)

miss <- anti_join(prev, curr, by = "Combined_Key")

curr_us <- curr %>%
    bind_rows(miss) %>%
    filter(!is.na(FIPS))

url = 'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json'
counties <- rjson::fromJSON(file = url)
# json_file$features[[1]]$id
df_confirmed <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv") %>%
    rename(
        state = `Province/State`,
        country = `Country/Region`,
        lat = Lat,
        long = Long
    ) %>%
    pivot_longer(
        cols = c(-state, -country, -lat, -long), 
        names_to = "date", 
        values_to = "confirmed"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_deaths <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv") %>%
    rename(
        state = `Province/State`,
        country = `Country/Region`,
        lat = Lat,
        long = Long
    ) %>%
    pivot_longer(
        cols = c(-state, -country, -lat, -long), 
        names_to = "date", 
        values_to = "deaths"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_recovered <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv") %>%
    rename(
        state = `Province/State`,
        country = `Country/Region`,
        lat = Lat,
        long = Long
    ) %>%
    pivot_longer(
        cols = c(-state, -country, -lat, -long), 
        names_to = "date", 
        values_to = "recovered"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_ts <- left_join(df_confirmed, df_deaths) %>%
    left_join(df_recovered)

df_confirmed_us <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv") %>%
    rename(
        state = `Province_State`,
        country = `Country_Region`,
        long = Long_,
        key = Combined_Key
    ) %>%
    rename_all(str_to_lower) %>%
    pivot_longer(
        cols = -(uid:key), 
        names_to = "date", 
        values_to = "confirmed"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_deaths_us <- read_csv("csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv") %>%
    rename(
        state = `Province_State`,
        country = `Country_Region`,
        long = Long_,
        key = Combined_Key
    ) %>%
    rename_all(str_to_lower) %>%
    pivot_longer(
        cols = -(uid:population), 
        names_to = "date", 
        values_to = "deaths"
    ) %>%
    mutate_at("date", as_date, format = "%m/%d/%y", tz = "UTC")

df_ts_us <- left_join(df_confirmed_us, df_deaths_us) %>%
    mutate(str_date = as.character(date))
df_country <- df_ts %>%
    group_by(country, date) %>%
    summarize_at(c("confirmed", "deaths", "recovered"), sum, na.rm = TRUE) %>%
    ungroup() %>%
    mutate(active = confirmed - deaths - recovered) %>%
    group_by(country) %>%
    mutate(
        new_cases = confirmed - lag(confirmed),
        new_deaths = deaths - lag(deaths)
    )

df_case1 <- df_country %>%
    group_by(country) %>%
    arrange(date, country) %>%
    filter(confirmed > 0) %>%
    distinct(country, .keep_all = TRUE) %>%
    select(country, date_case1 = date)

df_case50 <- df_country %>%
    group_by(country) %>%
    arrange(date, country) %>%
    filter(confirmed > 50) %>%
    distinct(country, .keep_all = TRUE) %>%
    select(country, date_case50 = date)
    
df_data <- df_country %>%
    left_join(df_case1) %>%
    left_join(df_case50) %>%
    mutate(
        day_case1 = difftime(date, date_case1, units = "days"),
        day_case50 = difftime(date, date_case50, units = "days"),
        usa = country == "US",
        str_date = as.character(date)
    ) %>%
    mutate_at(c("day_case1", "day_case50"), as.numeric)

df_top25 <- df_data %>%
    arrange(desc(date), country) %>%
    distinct(country, .keep_all = TRUE) %>%
    ungroup() %>%
    top_n(25, confirmed)

df_top12 <- df_data %>%
    arrange(desc(date), country) %>%
    distinct(country, .keep_all = TRUE) %>%
    ungroup() %>%
    top_n(12, confirmed)

df_plt <- filter(df_data, confirmed > 0) %>%
    group_by(country, date) %>%
    mutate(death_rate = deaths / confirmed * 100) %>%
    ungroup()

df_plt_top12 <- semi_join(df_plt, df_top12, by = "country") 

df_plt_day1 <- filter(df_data, day_case1 >= 0)

df_plt_day1_top25 <- semi_join(df_plt_day1, df_top25, by = "country")

df_plt_day1_top12 <- semi_join(df_plt_day1, df_top12, by = "country")

df_plt_day50 <- filter(df_data, day_case50 >= 0)

df_plt_day50_top25 <- semi_join(df_plt_day50, df_top25, by = "country")
library(plotly)

Top 12 countries by confirmed cases

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~confirmed, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Confirmed cases by country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~deaths, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Deaths by country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~death_rate, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Death rate by country

df_plt_day1_top12 %>%
    plot_ly(x = ~day_case1, y = ~confirmed, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Confirmed cases since first case reported in each country

df_plt_day1_top12 %>%
    plot_ly(x = ~day_case1, y = ~deaths, color = ~country, colors = "Paired") %>%
    add_lines(hovertext = ~country) %>%
    layout(showlegend = FALSE)

Deaths since first case reported in each country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~new_cases, color = ~country, colors = "Paired") %>%
    add_bars(hovertext = ~country) %>%
    layout(showlegend = FALSE)

New cases reported each day by country

df_plt_top12 %>%
    plot_ly(x = ~date, y = ~new_deaths, color = ~country, colors = "Paired") %>%
    add_bars(hovertext = ~country) %>%
    layout(showlegend = FALSE)

New deaths reported each day by country

df_plt_day1_top12 %>%
    ggplot(aes(x = day_case1, y = new_cases, color = country, size = usa)) +
    geom_smooth(se = FALSE) +
    scale_color_brewer(NULL, palette = "Paired") +
    scale_size_discrete(NULL, range = c(0, 1.5)) +
    theme_bg() +
    theme(legend.position = "none")
df_plt_day1_top12 %>%
    ggplot(aes(x = day_case1, y = new_deaths, color = country, size = usa)) +
    geom_smooth(se = FALSE) +
    scale_color_brewer(NULL, palette = "Paired") +
    scale_size_discrete(NULL, range = c(0, 1.5)) +
    theme_bg() +
    theme(legend.position = "none")
df_plt %>%
    plot_ly(
        type = "scattergeo",
        locationmode = "country names", 
        locations = ~country, 
        size = ~confirmed,
        frame = ~str_date,
        showlegend = FALSE
    ) %>%
    layout(geo = list(projection = list(type = "natural earth")))

Confirmed cases by country over time

curr_us %>%
    plot_ly(
        type = "choropleth",
        geojson = counties,
        # locationmode = "USA_states",
        locations = ~FIPS, 
        text = ~Combined_Key,
        z = ~Confirmed,
        zmin = 0,
        zmax = 100,
        # hovertemplate = "%{Combined_Key}: %{Confirmed}",
        colorscale = "Reds",
        # frame = ~str_date,
        showlegend = FALSE
    ) %>%
    layout(geo = list(projection = list(type = 'albers usa')))

Confirmed cases by US county

curr_us %>%
    plot_ly(
        type = "choropleth",
        geojson = counties,
        locations = ~FIPS, 
        text = ~Combined_Key,
        z = ~Deaths,
        zmin = 0,
        zmax = 10,
        colorscale = "Reds",
        # frame = ~str_date,
        showlegend = FALSE
    ) %>%
    layout(geo = list(projection = list(type = 'albers usa')))

Deaths by US county

# df_ts_us %>%
#     plot_ly(
#         type = "scattergeo",
#         locationmode = "country names", 
#         locations = ~country, 
#         size = ~confirmed,
#         frame = ~str_date,
#         showlegend = FALSE
#     ) %>%
#     layout(geo = list(projection = list(type = "natural earth")))

df_ts_us %>%
    plot_ly(
        type = "choropleth",
        geojson = counties,
        # locationmode = "USA_states",
        locations = ~fips, 
        text = ~key,
        z = ~confirmed,
        zmin = 0,
        zmax = 100,
        # hovertemplate = "%{Combined_Key}: %{Confirmed}",
        colorscale = "Reds",
        frame = ~str_date,
        showlegend = FALSE
    ) %>%
    layout(geo = list(projection = list(type = 'albers usa')))